
 //-----------------------------------------------------------------------------
//
//Copyright(c) 2020, ThorsianWay Technologies Co, Ltd
//All rights reserved.
//
//IP Name       :   pixel_shader
//File Name     :   tex_addr.v
//Module name   :   tex_addr
//Full name     :   texel addr generator
//
//Author        :   zha daolu
//Email         :   
//Data          :   2020/5/13
//Version       :   V1.00
//
//Abstract      :   
//                  
//Called  by    :   GPU
//
//Modification history
//-----------------------------------------------------
//1.00: intial version 
//
//-----------------------------------------------------------------------------  

module tex_addr(
    clk,                            //input clock                                              
    rst_n,                          //input reset, low active                                  
    uv_fifo_empty,                  //input uv fifo empty                                      
    u,                              //input u from uv fifo, 24.8                               
    v,                              //input v from uv fifo, 24.8                               
    uv_end,                         //process end
    uv_fifo_rd,                     //output uv fifo read                                       
    tex_type,                       //tex type, fix to RGBA8888
    tex_width,                      //texture width,support up to 2^24-1                          
    tex_height,                     //teture height,support up to 2^24-1                          
    tex_addr,                       //current texture address in memory                           
    block_addr0                ,    //texel 0 cache line address , for bilinear filter            
    in_block_addr0             ,    //texel 0 in cache line address                               
    textype_out                ,    //tex type, fix to RGBA8888 
    block_addr1                ,    //texel 1 cache line address , for bilinear filter            
    in_block_addr1             ,    //texel 1 in cache line address                               
    block_addr2                ,    //texel 2 cache line address , for bilinear filter            
    in_block_addr2             ,    //texel 2 in cache line address                               
    block_addr3                ,    //texel 3 cache line address , for bilinear filter            
    in_block_addr3             ,    //texel 3 in cache line address                               
    addr_fifo_w_en,                 //texel address valid, 1: bilinear enable, 0: address valid   
    addr_fifo_almost_full,          //address fifo in cache full                                  
    param1,                         //bilinear parameter1                                         
    param2,                         //bilinear parameter2                                         
    param3,                         //bilinear parameter3                                         
    param4,                         //bilinear parameter4                                         
    param_fifo_w_en,                //parameter fifo write enable                                 
    param_en,                       //parameter valid                                             
    param_fifo_almost_full          //parameter fifo almost full                                                                                               
                                                                                              
);
input clk;
input rst_n;

input uv_fifo_empty;
input [31:0] u;
input [31:0] v;
input uv_end;
output reg uv_fifo_rd;

input [3:0] tex_type;
input [31:0] tex_width;
input [31:0] tex_height;
input [31:0] tex_addr;

output reg [31:0] block_addr0;
output reg [6:0] in_block_addr0;
output reg [31:0] block_addr1;
output reg [6:0] in_block_addr1;
output reg [31:0] block_addr2;
output reg [6:0] in_block_addr2;
output reg [31:0] block_addr3;
output reg [6:0] in_block_addr3;
output [3:0] textype_out;
output reg [1:0] addr_fifo_w_en;
input addr_fifo_almost_full;

output reg [7:0] param1;
output reg [7:0] param2;
output reg [7:0] param3;
output reg [7:0] param4;
output reg param_fifo_w_en;
output reg param_en;
input param_fifo_almost_full;


reg [31:0] u_ff1;
reg [31:0] v_ff1;
reg [31:0] v_reg;
reg [23:0] mul_in_a0;
reg [23:0] mul_in_a2;
reg [31:0] mul_in_b;
reg mul_in_en;
wire addr_translate_en;
wire [55:0] v_x_width;//v_x_width;
wire [55:0] v_plus_1_x_width;
wire mul_out_en;
wire [15:0] u_dec_x_v_dec;
reg [15:0] u_dec_x_v_dec_ff1;
reg uv_fifo_rd_ff1;
reg mul_out_en_ff1;

reg [31:0] u_ff2;
reg [31:0] v_ff2;
reg [31:0] u_ff3;
reg [31:0] v_ff3;

wire [31:0]  texel_pos_0;
wire [31:0]  texel_pos_1;
wire [31:0]  texel_pos_2;
wire [31:0]  texel_pos_3;



// register for pipeline
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        mul_out_en_ff1 <=  1'b0;
    else if(mul_out_en && ~addr_translate_en)
        mul_out_en_ff1 <=  1'b1;
    else if(addr_translate_en)
        mul_out_en_ff1 <=  1'b0;
end

//pipeline ctrl signal
assign addr_translate_en = ~addr_fifo_almost_full && ~param_fifo_almost_full;

//uv fifo read logic
always@(*)
begin
    if(~uv_fifo_empty && addr_translate_en)
        uv_fifo_rd = 1'b1;
    else
        uv_fifo_rd = 1'b0;
end  

//****************************************************************************************
//stage 1 cal v/v+1 mul tex_width, registered mul input, mul cost 2 cycles, 3 cycles total
//****************************************************************************************

//registered fifo read
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        uv_fifo_rd_ff1 <=  1'b0;
    else if(addr_translate_en)
        uv_fifo_rd_ff1 <=  uv_fifo_rd;
end   

//registered mul input
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        begin
            mul_in_a0 <=  24'b0;
            mul_in_a2 <=  24'b0;
            mul_in_b  <=  32'b0;
            mul_in_en <=  1'b0;
        end
    else if(addr_translate_en)
        begin
            mul_in_a0 <= v[31:8];
            mul_in_a2 <= v[31:8] + 1;
            mul_in_b  <= tex_width;
            mul_in_en <=  ~uv_fifo_empty && addr_translate_en;
        end
end

// registered u v and mlu_out 
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        begin
            u_ff1 <=  32'b0;
            v_ff1 <=  32'b0;
            u_ff2 <=  32'b0;
            v_ff2 <=  32'b0;
            u_ff3 <=  32'b0;
            v_ff3 <=  32'b0;
            u_dec_x_v_dec_ff1 <=  16'b0;
        end
    else if(addr_translate_en)
        begin            
            u_ff1 <=  u;
            v_ff1 <=  v;
            u_ff2 <=  u_ff1;
            v_ff2 <=  v_ff1;
            u_ff3 <=  u_ff2;
            v_ff3 <=  v_ff2;
            u_dec_x_v_dec_ff1 <=  u_dec_x_v_dec;
        end
end 

// v * width
// 2 cycles
mul_24x32 u_v_x_width(
    .clk(clk),
    .rst_n(rst_n),
    .mul_in_a(mul_in_a0),
    .mul_in_b(mul_in_b),
    .mul_in_en(mul_in_en),
    .mul_en(addr_translate_en),
    .mul_out(v_x_width),
    .mul_out_en(mul_out_en)
);

// (v + 1) * width
// 2 cycles
mul_24x32 u_v_plus_1_x_width(
    .clk(clk),
    .rst_n(rst_n),
    .mul_in_a(mul_in_a2),
    .mul_in_b(mul_in_b),
    .mul_in_en(mul_in_en),
    .mul_en(addr_translate_en),
    .mul_out(v_plus_1_x_width),
    .mul_out_en()
);

// cal {u} * {v} for bilinear parameter
mult_8x8 u_u_dec_x_v_dec(
.a(u_ff2[7:0]),
.b(v_ff2[7:0]),
.out(u_dec_x_v_dec)
);  

//******************************
//stage 2 cal addr and parameter
//******************************

// u + 1 and v + 1 for bilinear interpolaration
wire [23:0] uplus1 = u_ff3[31:8]+1;
wire [23:0] vplus1 = v_ff3[31:8]+1;

//texel in memory address, in texel alignment
//support up to 2^16 texels due to memory width
assign  texel_pos_0 = {8'b0,u_ff3[31:8]} +   v_x_width[31:0];
assign  texel_pos_1 = {8'b0,uplus1} +   v_x_width[31:0];
assign  texel_pos_2 = {8'b0,u_ff3[31:8]} +   v_plus_1_x_width[31:0];
assign  texel_pos_3 = {8'b0,uplus1} +   v_plus_1_x_width[31:0];   


//16 bit paramater 
wire [16:0] param1_16 = 17'h10000 - {u_ff3[7:0],8'b0} - {v_ff3[7:0],8'b0} + u_dec_x_v_dec_ff1;  //16 bit parameter for bilinear (1-{u})*(1-{v})
wire [16:0] param2_16 = {u_ff3[7:0],8'b0} - u_dec_x_v_dec_ff1;                                  //16 bit parameter for bilinear {u}*(1-{v})
wire [16:0] param3_16 = {v_ff3[7:0],8'b0} - u_dec_x_v_dec_ff1;                                  //16 bit parameter for bilinear {v}*(1-{u})
wire [16:0] param4_16 = u_dec_x_v_dec_ff1;                                                      //16 bit parameter for bilinear {u}*{v}

//cal 8bit parameter
always@(posedge clk or negedge rst_n)
begin
   if(~rst_n)
       begin
           param1 <=  8'b0;
           param2 <=  8'b0;
           param3 <=  8'b0;
           param4 <=  8'b0;
       end
   else if(u_ff3[7:0] != 0 || v_ff3[7:0] != 0)
       begin
           param1 <=  param1_16[15:8] + param1_16[7];
           param2 <=  param2_16[15:8] + param2_16[7];
           param3 <=  param3_16[15:8] + param3_16[7];
           param4 <=  param4_16[15:8] + param4_16[7];
       end
end 


//cal addr for texel 0
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        begin
            block_addr0 <=  32'b0;
            in_block_addr0 <=  7'b0;
        end
    else if(addr_translate_en)
        begin 
            block_addr0    <=  tex_addr + ((texel_pos_0 >> 1) << 3);                                            
            in_block_addr0 <=  {1'b0,texel_pos_0[0],5'b0};                                                                                      
        end
end


//cal addr for texel 1  
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
    begin
        block_addr1 <=  32'b0;
        in_block_addr1 <=  7'b0;
    end
    else if(addr_translate_en)
    begin 
        block_addr1    <=  tex_addr + ((texel_pos_1 >> 1) << 3);                                            
        in_block_addr1 <=  {1'b0,texel_pos_1[0],5'b0};                                                                                      
    end
end


//cal addr for texel 2  
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
    begin
        block_addr2 <=  32'b0;
        in_block_addr2 <=  7'b0;
    end
    else if(addr_translate_en)
    begin 
        block_addr2    <=  tex_addr + ((texel_pos_2 >> 1) << 3);                                            
        in_block_addr2 <=  {1'b0,texel_pos_2[0],5'b0};                                                                                      
    end
end


//cal addr for texel 3
always@(posedge clk or negedge rst_n)
begin
        if(~rst_n)
        begin
            block_addr3 <=  32'b0;
            in_block_addr3 <=  7'b0;
        end
        else if(addr_translate_en)
        begin 
             block_addr3    <=  tex_addr + ((texel_pos_3 >> 1) << 3);                                            
             in_block_addr3 <=  {1'b0,texel_pos_3[0],5'b0};                                                                                      
        end
end 

//output valid signal 
always@(*)   param_fifo_w_en = addr_fifo_w_en[0];
always@(*)   param_en = addr_fifo_w_en[1]; 

always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        addr_fifo_w_en[0] <=  1'b0;
    else if((mul_out_en || mul_out_en_ff1) && addr_translate_en)
        addr_fifo_w_en[0] <=  1'b1;
    else
        addr_fifo_w_en[0] <=  1'b0;
end

//output bilinear enable
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        addr_fifo_w_en[1] <=  1'b0;
    else if((mul_out_en || mul_out_en_ff1) && addr_translate_en && (u_ff3[7:0] != 0 || v_ff3[7:0] !=0))
        addr_fifo_w_en[1] <=  1'b1;
    else if((mul_out_en || mul_out_en_ff1) && addr_translate_en)
        addr_fifo_w_en[1] <=  1'b0;
end

endmodule


